#' Compare data frames columns before merging
#'
#' Generate a comparison of data.frames (or similar objects) that indicates if
#' they will successfully bind together by rows.
#'
#' @details Due to the returned "column_name" column, no input data.frame may be
#'   named "column_name".
#'
#'   The `strict_description` argument is most typically used to understand
#'   if factor levels match or are bindable.  Factors are typically bindable,
#'   but the behavior of what happens when they bind differs based on the
#'   binding method ("bind_rows" or "rbind").  Even when
#'   `strict_description` is `FALSE`, data.frames may still bind
#'   because some classes (like factors and characters) can bind even if they
#'   appear to differ.
#'
#' @param ... A combination of data.frames, tibbles, and lists of
#'   data.frames/tibbles.  The values may optionally be named arguments; if
#'   named, the output column will be the name; if not named, the output column
#'   will be the data.frame name (see examples section).
#' @param return Should a summary of "all" columns be returned, only return
#'   "match"ing columns, or only "mismatch"ing columns?
#' @param bind_method What method of binding should be used to determine
#'   matches? With "bind_rows", columns missing from a data.frame would be
#'   considered a match (as in `dplyr::bind_rows()`; with "rbind", columns
#'   missing from a data.frame would be considered a mismatch (as in
#'   `base::rbind()`.
#' @param strict_description Passed to `describe_class`.  Also, see the
#'   Details section.
#' @return A data.frame with a column named "column_name" with a value named
#'   after the input data.frames' column names, and then one column per
#'   data.frame (named after the input data.frame).  If more than one input has
#'   the same column name, the column naming will have suffixes defined by
#'   sequential use of `base::merge()` and may differ from expected naming.
#'   The rows within the data.frame-named columns are descriptions of the
#'   classes of the data within the columns (generated by
#'   `describe_class`).
#' @examples
#' compare_df_cols(data.frame(A = 1), data.frame(B = 2))
#' # user-defined names
#' compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2))
#' # a combination of list and data.frame input
#' compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3))
#' @family data frame type comparison
#' @export
compare_df_cols <- function(..., return = c("all", "match", "mismatch"), bind_method = c("bind_rows", "rbind"), strict_description = FALSE) {
  # Input checking
  return <- match.arg(return)
  bind_method <- match.arg(bind_method)
  args <- list(...)
  mask_input_data_frame <- sapply(X = args, FUN = is.data.frame)
  mask_input_list <- sapply(X = args, FUN = is.list) & !mask_input_data_frame
  mask_input_other <- !(mask_input_data_frame | mask_input_list)
  if (any(mask_input_other)) {
    stop(
      "Input given with `...` must be either a data.frame or a list of data.frames. Argument ",
      # the `collapse` argument is required for msg1 to prevent an ngettext
      # error; the input must be scalar.
      ngettext(
        sum(mask_input_other),
        msg1 = paste("number", which(mask_input_other), "is not.", collapse = "\n"),
        msg2 = paste("numbers", paste(which(mask_input_other), collapse = ", "), "are not.")
      )
    )
  }
  bad_list_inputs <- numeric(0)
  for (idx in which(mask_input_list)) {
    bad_list_inputs <-
      c(
        bad_list_inputs,
        if (!all(sapply(X = args[[idx]], FUN = is.data.frame))) {
          idx
        } else {
          numeric(0)
        }
      )
  }
  if (length(bad_list_inputs)) {
    stop(
      "List inputs must be lists of data.frames.  List input ",
      if (length(bad_list_inputs) == 1) {
        paste("number", bad_list_inputs, "is not a list of data.frames.")
      } else if (length(bad_list_inputs) < 6) {
        paste("numbers", paste(bad_list_inputs, collapse = ", "), "are not lists of data.frames.")
      } else {
        paste("numbers", paste(c(bad_list_inputs[1:5], "..."), collapse = ", "), "are not lists of data.frames.")
      }
    )
  }

  # Generate and check column names
  direct_names <- names(args)
  indirect_names <- as.character(match.call(expand.dots = TRUE))
  indirect_names <- indirect_names[!(indirect_names %in% as.character(match.call(expand.dots = FALSE)))]
  if (is.null(direct_names)) {
    final_names <- indirect_names
  } else {
    final_names <- direct_names
    mask_replace <- final_names %in% ""
    final_names[mask_replace] <- indirect_names[mask_replace]
  }
  final_names <- as.list(final_names)
  for (idx in which(mask_input_list)) {
    current_list_names <- names(args[[idx]])
    final_names[[idx]] <-
      if (is.null(current_list_names)) {
        paste(final_names[[idx]], seq_along(args[[idx]]), sep = "_")
      } else if (any(mask_unnamed_list <- current_list_names %in% "")) {
        current_list_names[mask_unnamed_list] <-
          paste(
            final_names[[idx]][mask_unnamed_list],
            seq_len(sum(mask_unnamed_list)),
            sep = "_"
          )
        current_list_names
      } else {
        current_list_names
      }
  }
  if (any(unlist(final_names) %in% "column_name")) {
    stop("None of the input ... argument names or list names may be `column_name`.")
  }
  ret <- compare_df_cols_df_maker(args, class_colname = final_names, strict_description = strict_description)
  if (return == "all" | ncol(ret) == 2) {
    if (return != "all") {
      warning("Only one data.frame provided, so all its classes are provided.")
    }
    rownames(ret) <- NULL
    ret
  } else {
    # Choose which way to test if the rows are bindable (NA matches or not).
    bind_method_fun <-
      list(
        rbind = function(idx) {
          all(unlist(ret[idx, 3:ncol(ret)]) %in% ret[idx, 2])
        },
        bind_rows = function(idx) {
          all(
            unlist(ret[idx, 3:ncol(ret)]) %in%
              c(
                NA_character_,
                na.omit(unlist(ret[idx, 2:ncol(ret)]))[1]
              )
          )
        }
      )
    mask_match <-
      sapply(
        X = seq_len(nrow(ret)),
        FUN = bind_method_fun[[bind_method]]
      )
    ret <-
      if (return == "match") {
        ret[mask_match, ]
      } else if (return == "mismatch") {
        ret[!mask_match, ]
      }
    rownames(ret) <- NULL
    ret
  }
}

#' This is the workhorse for making a data.frame description used by
#' compare_df_cols
#' @param x The data.frame or list of data.frames
#' @param class_colname The name for the column-name-defining column
#' @param strict_description Passed to `describe_class`
#' @return A 2-column data.frame with the first column naming all the columns of
#'   `x` and the second column (named after the value in
#'   `class_colname`) defining the classes using
#'   `describe_class()`.
#' @noRd
compare_df_cols_df_maker <- function(x, class_colname = "class", strict_description) {
  UseMethod("compare_df_cols_df_maker")
}

#' @exportS3Method NULL
compare_df_cols_df_maker.data.frame <- function(x, class_colname = "class", strict_description) {
  if (class_colname == "column_name") {
    stop('`class_colname` cannot be "column_name"')
  }
  if (ncol(x) == 0) {
    warning(class_colname, " has zero columns and will not appear in output.")
    ret <- data.frame(column_name = character(0), stringsAsFactors = FALSE)
  } else {
    ret <-
      data.frame(
        column_name = names(x),
        X = sapply(X = x, FUN = describe_class, strict_description = strict_description),
        stringsAsFactors = FALSE
      )
    names(ret)[2] <- class_colname
  }
  ret
}

#' @exportS3Method NULL
compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_description = strict_description) {
  if (length(class_colname) != length(x)) {
    stop("`x` and `class_colname` must be the same length.")
  } else if (any(class_colname == "column_name")) {
    stop('`class_colname` cannot be "column_name"')
  }
  ret <-
    lapply(
      X = seq_along(x),
      FUN = function(idx) {
        compare_df_cols_df_maker(
          x = x[[idx]],
          class_colname = class_colname[[idx]],
          strict_description = strict_description
        )
      }
    )
  Reduce(f = function(x, y) {
    merge(x, y, by = "column_name", all = TRUE)
  }, x = ret)
}

#' Do the the data.frames have the same columns & types?
#'
#' Check whether a set of data.frames are row-bindable. Calls `compare_df_cols()`
#' and returns `TRUE` if there are no mis-matching rows.
#'
#' @inheritParams compare_df_cols
#' @param verbose Print the mismatching columns if binding will fail.
#' @return `TRUE` if row binding will succeed or `FALSE` if it will fail.
#' @family data frame type comparison
#' @examples
#' compare_df_cols_same(data.frame(A = 1), data.frame(A = 2))
#' compare_df_cols_same(data.frame(A = 1), data.frame(B = 2))
#' compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), verbose = FALSE)
#' compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), bind_method = "rbind")
#' @export
compare_df_cols_same <- function(..., bind_method = c("bind_rows", "rbind"), verbose = TRUE) {
  bind_method <- match.arg(bind_method)
  ret <- compare_df_cols(..., return = "mismatch", bind_method = bind_method)
  if (nrow(ret) & verbose) {
    print(ret)
  }
  nrow(ret) == 0
}

#' Describe the class(es) of an object
#'
#' @details For package developers, an S3 generic method can be written for
#'   `describe_class()` for custom classes that may need more definition
#'   than the default method.  This function is called by [compare_df_cols()].
#'
#' @param x The object to describe
#' @param strict_description Should differing factor levels be treated
#'   as differences for the purposes of identifying mismatches?
#'   `strict_description = TRUE` is stricter and factors with different
#'   levels will be treated as different classes.  `FALSE` is more
#'   lenient: for class comparison purposes, the variable is just a "factor".
#' @return A character scalar describing the class(es) of an object where if the
#'   scalar will match, columns in a data.frame (or similar object) should bind
#'   together without issue.
#' @family data frame type comparison
#' @examples
#' describe_class(1)
#' describe_class(factor("A"))
#' describe_class(ordered(c("A", "B")))
#' describe_class(ordered(c("A", "B")), strict_description = FALSE)
#' @export
describe_class <- function(x, strict_description = TRUE) {
  UseMethod("describe_class")
}

#' @describeIn describe_class Describe factors with their levels
#'   and if they are ordered.
#' @export
describe_class.factor <- function(x, strict_description = TRUE) {
  if (strict_description) {
    all_classes <- class(x)
    all_levels <- levels(x)
    level_text <- sprintf("levels=c(%s)", paste('"', levels(x), '"', sep = "", collapse = ", "))
    factor_text <- sprintf("factor(%s)", level_text)
    mask_factor <- class(x) == "factor"
    all_classes[mask_factor] <- factor_text
    paste(all_classes, collapse = ", ")
  } else {
    all_classes <- setdiff(class(x), "ordered")
    paste(all_classes, collapse = ", ")
  }
}

#' @describeIn describe_class List all classes of an object.
#' @export
describe_class.default <- function(x, strict_description = TRUE) {
  all_classes <- class(x)
  paste(all_classes, collapse = ", ")
}
